//merge application-assignee-inventor-gender data with correspondence (attorney) data

use "${rawdata}2014/correspondence_address.dta", clear
//drop vars we don't need
drop correspondence_street_line_1 correspondence_street_line_2 correspondence_city correspondence_postal_code correspondence_region_code correspondence_country_code correspondence_country_name customer_number

bysort application_num: gen apptag=_n
//very few apps appear more than once-- just keep 1 entry each
drop if apptag > 1
merge 1:m application_number using "${filedata}2014apps_emp_inventors_gender.dta"
drop if _merge ==1 
drop _merge

//ID the top 100 and top 50 law firms from Vault ranking: https://vault.com/best-companies-to-work-for/law/top-100-law-firms-rankings
//we did this in May 2021


forvalues i=1/114 {
gen top1_`i'=.
gen top2_`i'=.
}

local  j 1
foreach i in "CRAVATH"	"SKADDEN"	"WACHTELL"	"SULLIVAN"	"LATHAM"	"KIRKLAND"	"DAVIS"	"SIMPSON"	"GIBSON"	"PAUL"	"SIDLEY"	"WEIL"	"QUINN"	"CLEARY"	"COVINGTON"	"JONES"	"WHITE"	"DEBEVOISE"	"ROPES"	"WILLIAMS"	"WILMER"	"PAUL"	"MORRISON"	"O'MELVENY"	"HOGAN"	"PROSKAUER"	"BOIES"	"GOODWIN"	"AKIN"	"ARNOLD"	"BAKER"	"ORRICK"	"MORGAN"	"WILSON"	"KING"	"MAYER"	"WINSTON"	"MUNGER"	"PERKINS"	"WILLKIE"	"CLIFFORD"	"BAKER"	"FRIED"	"SHEARMAN"	"ALLEN"	"SUSMAN"	"GREENBERG"	"ALSTON"	"CADWALADER"	"MCDERMOTT"	"VINSON"	"HOLLAND"	"JENNER"	"PILLSBURY"	"REED"	"NORTON"	"BAKER"	"FRESHFIELDS"	"CAHILL"	"NIXON"	"CROWELL"	"IRELL"	"FOLEY"	"SHEPPARD"	"FENWICK"	"FISH"	"MCGUIRE"	"STEPTOE"	"SQUIRE"	"LOCKE"	"ARENT"	"KATTEN"	"TROUTMAN"	"SCHULTE"	"BRYAN"	"SEYFARTH"	"FOX"	"MINTZ"	"DAVIS WRIGHT"	"HAYNES"	"DUANE"	"GUNDERSON"	"FAEGRE"	"BLANK"	"BALLARD"	"KRAMER"	"FOLEY"	"HUGHES"	"KELLOGG"	"KILPATRICK"	"COZEN"	"LITTLER"	"FINNEGAN"	"KNOBBE"	"STERNE"	"GREENBERG"	"DESMARAIS"	"DURIE"	"BANNER"	"KEKER"	"WOLF"	"OBLON"	"SUGHRUE"	"BRINKS"	"MERCHANT"	"FROSS"	"MCANDREWS"	"ROTHWELL"	"MARSHALL"	"SCHWEGMAN"	"LEYDIG"	"KLARQUIST"	"PATTERSON"	"SEED INTELLECTUAL" {
disp `j'
disp "`i'"
replace top1_`j'=1 if regexm( correspondence_name_line_1 , "`i'")>0 
replace top2_`j'=1 if regexm( correspondence_name_line_2 , "`i'")>0 
local  j =`j'+1
}

local  j 1
foreach i in "SWAINE"	"ARPS"	"LIPTON"	"CROMWELL"	"WATKINS"	"ELLIS"	"POLK"	"THACER"	"DUNN"	"WEISS"	"AUSTIN"	"GOTSHAL"	"EMANUEL"	"GOTTLIEB"	"BURLING"	"DAY"	"CASE"	"PLIMPTON"	"GRAY"	"CONNOLLY"	"HALE"	"HASTINGS"	"FOESTER"	"MYERS"	"LOVELLS"	"ROSE"	"SCHILLER"	"PROCTER"	"GUMP"	"PORTER"	"MCKENZIE"	"HERRINGTON"	"LEWIS"	"SONSINI"	"SPALDING"	"BROWN"	"STRAW"	"TOLLES"	"COIE"	"FARR"	"CHANCE"	"BOTTS"	"FRANK"	"STERLING"	"OVERY"	"GODFREY"	"TRAURIG"	"BIRD"	"WICKERSHAM"	"WILL"	"ELKINS"	"KNIGHT"	"BLOCK"	"WINTHROP"	"SMITH"	"ROSE"	"HOSTETLER"	"BRUCKHAUS"	"GORDON"	"PEABODY"	"MORING"	"MANELLA"	"LARDNER"	"MULLIN"	"WEST"	"RICHARDSON"	"WOODS"	"JOHNSON"	"PATTON"	"LORD"	"FOX"	"MUCHIN"	"PEPPER"	"ROTH"	"CAVE"	"SHAW"	"ROTHSCHILD"	"LEVIN"	"TREMAINE"	"BOONE"	"MORRIS"	"DETTMER"	"DRINKER"	"ROME"	"SPAHR"	"LEVIN"	"HOAG"	"HUBBARD"	"HANSEN"	"TOWNSEND"	"CONNOR"	"MENDELSON"	"HENDERSON"	"MARTENS"	"KESSLER"	"TRAURIG"	"DESMARAIS"	"TANGRI"	"WITCOFF"	"VAN NEST"	"GREENFIELD"	"MCCLELLAND"	"MION"	"GILSON"	"GOULD"	"ZELNICK"	"HELD"	"FIGG"	"GERSTEIN"	"LUNDBERG"	"VOIT"	"SPARKMAN"	"SHERIDAN"	"PROPERTY" {
disp `j'
disp "`i'"
replace top1_`j'=top1_`j'+1 if regexm( correspondence_name_line_1 , "`i'")>0 
replace top2_`j'=top2_`j'+1 if regexm( correspondence_name_line_2 , "`i'")>0 
local  j =`j'+1
}


gen top_s=0
foreach i in "COOLEY" "MILBANK" "DLA PIPER" "K&L GATES" "LINKLATERS" "DECHERT" "DENTONS" "VENABLE" {
replace top_s=2 if  regexm( correspondence_name_line_1 , "`i'")>0 
replace top_s=2 if  regexm( correspondence_name_line_2 , "`i'")>0
}

//
egen top_max=rowmax(top*)
//drop the nomiss line-- this drops someone who uses an attorney named Paul, for example, or with law firm Baked & anything
gen top100=0
replace top100=1 if top_max==2

egen top50_temp=rowmax(top1_1-top1_46 top1_93-top1_114)
egen top50_temp2=rowmax(top2_1-top2_46 top2_93-top2_114)
gen top50=0
replace top50=1 if top50_temp==2 |top50_temp2==2 
replace top50=1 if regexm( correspondence_name_line_1 , "COOLEY")>0 | regexm( correspondence_name_line_2 , "COOLEY")>0 | ///
				regexm( correspondence_name_line_1 , "MILBANK")>0 | regexm( correspondence_name_line_2 , "MILBANK")>0 | ///
				regexm( correspondence_name_line_1 , "DLA PIPER")>0 | regexm( correspondence_name_line_2 , "DLA PIPER")>0 | ///
				regexm( correspondence_name_line_1 , "K&L GATES")>0 | regexm( correspondence_name_line_2 , "K&L GATES")>0 | ///
				regexm( correspondence_name_line_1 , "LINKLATERS")>0 | regexm( correspondence_name_line_2 , "LINKLATERS")>0 

drop top1_1-top_max top50_temp top50_temp2 

gen lawyer=( atty_docket_number!="")	
rename top100 top100_atty
rename top50 top50_atty

save "${filedata}2014apps_emp_lawyer_inventors_gender.dta", replace
//this is inventor*app level data
